import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# Comment this if the data visualisations doesn't work
%matplotlib inline
plt.style.use('bmh')
df = pd.read_csv("/Users/Prosenjeet Saha/Desktop/Data/Gold Price Data.csv")
df.head()
| Date | Open | High | Low | Close | WAP | No. of Shares | No. of Trades | Total Turnover | Deliverable Quantity | % Deli. Qty to Traded Qty | Spread H-L | Spread C-O | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2017-02-06 | 0.79 | 0.79 | 0.76 | 0.76 | 0.79 | 7430.0 | 7.0 | 5848.0 | 7430.0 | 100.0 | 0.03 | -0.03 |
| 1 | 2017-02-03 | 0.79 | 0.79 | 0.79 | 0.79 | 0.79 | 310.0 | 4.0 | 244.0 | 310.0 | 100.0 | 0.00 | 0.00 |
| 2 | 2017-02-02 | 0.83 | 0.83 | 0.83 | 0.83 | 0.83 | 75.0 | 1.0 | 62.0 | 75.0 | 100.0 | 0.00 | 0.00 |
| 3 | 2017-01-31 | 0.87 | 0.87 | 0.87 | 0.87 | 0.87 | 1050.0 | 2.0 | 913.0 | 1050.0 | 100.0 | 0.00 | 0.00 |
| 4 | 2017-01-25 | 0.91 | 0.91 | 0.91 | 0.91 | 0.91 | 400.0 | 1.0 | 364.0 | 400.0 | 100.0 | 0.00 | 0.00 |
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1660 entries, 0 to 1659 Data columns (total 13 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Date 1660 non-null object 1 Open 1660 non-null float64 2 High 1660 non-null float64 3 Low 1660 non-null float64 4 Close 1660 non-null float64 5 WAP 1660 non-null float64 6 No. of Shares 1660 non-null float64 7 No. of Trades 1660 non-null float64 8 Total Turnover 1660 non-null float64 9 Deliverable Quantity 1660 non-null float64 10 % Deli. Qty to Traded Qty 1660 non-null float64 11 Spread H-L 1660 non-null float64 12 Spread C-O 1660 non-null float64 dtypes: float64(12), object(1) memory usage: 168.7+ KB
df.describe()
plt.figure(figsize=(9, 8))
sns.distplot(df.describe(), color='g', bins=100, hist_kws={'alpha': 0.4});
C:\Users\Prosenjeet Saha\anaconda3\lib\site-packages\seaborn\distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)
#Numerical data distribution
list(set(df.dtypes.tolist()))
[dtype('O'), dtype('float64')]
df_num = df.select_dtypes(include = ['float64', 'int64'])
df_num.head()
| Open | High | Low | Close | WAP | No. of Shares | No. of Trades | Total Turnover | Deliverable Quantity | % Deli. Qty to Traded Qty | Spread H-L | Spread C-O | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.79 | 0.79 | 0.76 | 0.76 | 0.79 | 7430.0 | 7.0 | 5848.0 | 7430.0 | 100.0 | 0.03 | -0.03 |
| 1 | 0.79 | 0.79 | 0.79 | 0.79 | 0.79 | 310.0 | 4.0 | 244.0 | 310.0 | 100.0 | 0.00 | 0.00 |
| 2 | 0.83 | 0.83 | 0.83 | 0.83 | 0.83 | 75.0 | 1.0 | 62.0 | 75.0 | 100.0 | 0.00 | 0.00 |
| 3 | 0.87 | 0.87 | 0.87 | 0.87 | 0.87 | 1050.0 | 2.0 | 913.0 | 1050.0 | 100.0 | 0.00 | 0.00 |
| 4 | 0.91 | 0.91 | 0.91 | 0.91 | 0.91 | 400.0 | 1.0 | 364.0 | 400.0 | 100.0 | 0.00 | 0.00 |
df_num.hist(figsize=(16, 20), bins=50, xlabelsize=8, ylabelsize=8); # ; avoid having the matplotlib verbose informations
#checking the data types
df.dtypes
Date object Open float64 High float64 Low float64 Close float64 WAP float64 No. of Shares float64 No. of Trades float64 Total Turnover float64 Deliverable Quantity float64 % Deli. Qty to Traded Qty float64 Spread H-L float64 Spread C-O float64 dtype: object
#Dropping the duplicate rows
df.shape
(1660, 13)
duplicate_rows_df = df[df.duplicated()]
print("number of duplicate rows: ", duplicate_rows_df.shape)
number of duplicate rows: (0, 13)
#Now let us remove the duplicate data because it's ok to remove them
df.count() # Used to count the number of rows
Date 1660 Open 1660 High 1660 Low 1660 Close 1660 WAP 1660 No. of Shares 1660 No. of Trades 1660 Total Turnover 1660 Deliverable Quantity 1660 % Deli. Qty to Traded Qty 1660 Spread H-L 1660 Spread C-O 1660 dtype: int64
df = df.drop_duplicates()
df.head(5)
| Date | Open | High | Low | Close | WAP | No. of Shares | No. of Trades | Total Turnover | Deliverable Quantity | % Deli. Qty to Traded Qty | Spread H-L | Spread C-O | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2017-02-06 | 0.79 | 0.79 | 0.76 | 0.76 | 0.79 | 7430.0 | 7.0 | 5848.0 | 7430.0 | 100.0 | 0.03 | -0.03 |
| 1 | 2017-02-03 | 0.79 | 0.79 | 0.79 | 0.79 | 0.79 | 310.0 | 4.0 | 244.0 | 310.0 | 100.0 | 0.00 | 0.00 |
| 2 | 2017-02-02 | 0.83 | 0.83 | 0.83 | 0.83 | 0.83 | 75.0 | 1.0 | 62.0 | 75.0 | 100.0 | 0.00 | 0.00 |
| 3 | 2017-01-31 | 0.87 | 0.87 | 0.87 | 0.87 | 0.87 | 1050.0 | 2.0 | 913.0 | 1050.0 | 100.0 | 0.00 | 0.00 |
| 4 | 2017-01-25 | 0.91 | 0.91 | 0.91 | 0.91 | 0.91 | 400.0 | 1.0 | 364.0 | 400.0 | 100.0 | 0.00 | 0.00 |
df.count()
Date 1660 Open 1660 High 1660 Low 1660 Close 1660 WAP 1660 No. of Shares 1660 No. of Trades 1660 Total Turnover 1660 Deliverable Quantity 1660 % Deli. Qty to Traded Qty 1660 Spread H-L 1660 Spread C-O 1660 dtype: int64
#Dropping the missing or null values
print(df.isnull().sum())
Date 0 Open 0 High 0 Low 0 Close 0 WAP 0 No. of Shares 0 No. of Trades 0 Total Turnover 0 Deliverable Quantity 0 % Deli. Qty to Traded Qty 0 Spread H-L 0 Spread C-O 0 dtype: int64
#Detecting Outliers
sns.boxplot(x=df['Open'])
<Axes: xlabel='Open'>
sns.boxplot(x=df['High'])
<Axes: xlabel='High'>
sns.boxplot(x=df['Low'])
<Axes: xlabel='Low'>
sns.boxplot(x=df['Close'])
<Axes: xlabel='Close'>
sns.boxplot(x=df['No. of Shares'])
<Axes: xlabel='No. of Shares'>
sns.boxplot(x=df['No. of Trades'])
<Axes: xlabel='No. of Trades'>
sns.boxplot(x=df['Total Turnover'])
<Axes: xlabel='Total Turnover'>
sns.boxplot(x=df['Deliverable Quantity'])
<Axes: xlabel='Deliverable Quantity'>
Q1 = df.quantile(0.25)
Q3 = df.quantile(0.75)
IQR = Q3 - Q1
print(IQR)
Open 1.004250e+01 High 1.056000e+01 Low 9.930000e+00 Close 1.002750e+01 WAP 1.011550e+01 No. of Shares 1.490235e+05 No. of Trades 1.712500e+02 Total Turnover 2.050616e+06 Deliverable Quantity 1.184230e+05 % Deli. Qty to Traded Qty 2.106750e+01 Spread H-L 8.500000e-01 Spread C-O 1.800000e-01 dtype: float64
df = df[~((df < (Q1 - 1.5 * IQR)) |(df > (Q3 + 1.5 * IQR))).any(axis=1)]
df.shape
C:\Users\Prosenjeet Saha\AppData\Local\Temp\ipykernel_12144\4147643536.py:1: FutureWarning: Automatic reindexing on DataFrame vs Series comparisons is deprecated and will raise ValueError in a future version. Do `left, right = left.align(right, axis=1, copy=False)` before e.g. `left == right` df = df[~((df < (Q1 - 1.5 * IQR)) |(df > (Q3 + 1.5 * IQR))).any(axis=1)]
(1132, 13)
#Plot different features against one another (scatter), against frequency (histogram)
df.hist()
array([[<Axes: title={'center': 'Open'}>,
<Axes: title={'center': 'High'}>,
<Axes: title={'center': 'Low'}>],
[<Axes: title={'center': 'Close'}>,
<Axes: title={'center': 'WAP'}>,
<Axes: title={'center': 'No. of Shares'}>],
[<Axes: title={'center': 'No. of Trades'}>,
<Axes: title={'center': 'Total Turnover'}>,
<Axes: title={'center': 'Deliverable Quantity'}>],
[<Axes: title={'center': '% Deli. Qty to Traded Qty'}>,
<Axes: title={'center': 'Spread H-L'}>,
<Axes: title={'center': 'Spread C-O'}>]], dtype=object)
#plotting Heat Maps
plt.figure(figsize=(10,5))
c= df.corr()
sns.heatmap(c,cmap="BrBG",annot=True)
c
| Open | High | Low | Close | WAP | No. of Shares | No. of Trades | Total Turnover | Deliverable Quantity | % Deli. Qty to Traded Qty | Spread H-L | Spread C-O | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Open | 1.000000 | 0.999359 | 0.999108 | 0.999692 | 0.999519 | 0.424709 | 0.235035 | 0.588435 | 0.433811 | -0.339378 | 0.803052 | -0.169538 |
| High | 0.999359 | 1.000000 | 0.999021 | 0.999413 | 0.999460 | 0.420926 | 0.235375 | 0.585213 | 0.430000 | -0.338162 | 0.813143 | -0.155049 |
| Low | 0.999108 | 0.999021 | 1.000000 | 0.999322 | 0.999504 | 0.410888 | 0.224782 | 0.575277 | 0.420581 | -0.328516 | 0.786593 | -0.148677 |
| Close | 0.999692 | 0.999413 | 0.999322 | 1.000000 | 0.999716 | 0.418452 | 0.231795 | 0.582777 | 0.428262 | -0.333555 | 0.800995 | -0.145009 |
| WAP | 0.999519 | 0.999460 | 0.999504 | 0.999716 | 1.000000 | 0.417035 | 0.230715 | 0.581522 | 0.426852 | -0.332976 | 0.799259 | -0.149420 |
| No. of Shares | 0.424709 | 0.420926 | 0.410888 | 0.418452 | 0.417035 | 1.000000 | 0.602534 | 0.921316 | 0.974295 | -0.697808 | 0.468916 | -0.315107 |
| No. of Trades | 0.235035 | 0.235375 | 0.224782 | 0.231795 | 0.230715 | 0.602534 | 1.000000 | 0.541898 | 0.574102 | -0.528441 | 0.327714 | -0.165539 |
| Total Turnover | 0.588435 | 0.585213 | 0.575277 | 0.582777 | 0.581522 | 0.921316 | 0.541898 | 1.000000 | 0.906034 | -0.638952 | 0.599039 | -0.317087 |
| Deliverable Quantity | 0.433811 | 0.430000 | 0.420581 | 0.428262 | 0.426852 | 0.974295 | 0.574102 | 0.906034 | 1.000000 | -0.568492 | 0.468032 | -0.288441 |
| % Deli. Qty to Traded Qty | -0.339378 | -0.338162 | -0.328516 | -0.333555 | -0.332976 | -0.697808 | -0.528441 | -0.638952 | -0.568492 | 1.000000 | -0.397517 | 0.284462 |
| Spread H-L | 0.803052 | 0.813143 | 0.786593 | 0.800995 | 0.799259 | 0.468916 | 0.327714 | 0.599039 | 0.468032 | -0.397517 | 1.000000 | -0.207906 |
| Spread C-O | -0.169538 | -0.155049 | -0.148677 | -0.145009 | -0.149420 | -0.315107 | -0.165539 | -0.317087 | -0.288441 | 0.284462 | -0.207906 | 1.000000 |
#Scatterplot
#We generally use scatter plots to find the correlation between two variables.
#Here the scatter plots are plotted between Horsepower and Price and
#we can see the plot below. With the plot given below,
#we can easily draw a trend line. These features provide a good scattering of points.
fig, ax = plt.subplots(figsize=(10,6))
ax.scatter(df['Open'], df['High'])
ax.set_xlabel('Open')
ax.set_ylabel('High')
plt.show()
import missingno as msno
msno.matrix(df, labels=True, sort="descending");
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) ~\AppData\Local\Temp\ipykernel_12144\3630483539.py in <module> 1 import missingno as msno 2 ----> 3 msno.matrix(df, labels=True, sort="descending"); ~\anaconda3\lib\site-packages\missingno\missingno.py in matrix(df, filter, n, p, sort, figsize, width_ratios, color, fontsize, labels, label_rotation, sparkline, freq, ax) 71 # Remove extraneous default visual elements. 72 ax0.set_aspect('auto') ---> 73 ax0.grid(b=False) 74 ax0.xaxis.tick_top() 75 ax0.xaxis.set_ticks_position('none') ~\anaconda3\lib\site-packages\matplotlib\axes\_base.py in grid(self, visible, which, axis, **kwargs) 3194 _api.check_in_list(['x', 'y', 'both'], axis=axis) 3195 if axis in ['x', 'both']: -> 3196 self.xaxis.grid(visible, which=which, **kwargs) 3197 if axis in ['y', 'both']: 3198 self.yaxis.grid(visible, which=which, **kwargs) ~\anaconda3\lib\site-packages\matplotlib\axis.py in grid(self, visible, which, **kwargs) 1653 gridkw['gridOn'] = (not self._major_tick_kw['gridOn'] 1654 if visible is None else visible) -> 1655 self.set_tick_params(which='major', **gridkw) 1656 self.stale = True 1657 ~\anaconda3\lib\site-packages\matplotlib\axis.py in set_tick_params(self, which, reset, **kwargs) 925 """ 926 _api.check_in_list(['major', 'minor', 'both'], which=which) --> 927 kwtrans = self._translate_tick_params(kwargs) 928 929 # the kwargs are stored in self._major/minor_tick_kw so that any ~\anaconda3\lib\site-packages\matplotlib\axis.py in _translate_tick_params(kw, reverse) 1069 for key in kw_: 1070 if key not in allowed_keys: -> 1071 raise ValueError( 1072 "keyword %s is not recognized; valid keywords are %s" 1073 % (key, allowed_keys)) ValueError: keyword grid_b is not recognized; valid keywords are ['size', 'width', 'color', 'tickdir', 'pad', 'labelsize', 'labelcolor', 'zorder', 'gridOn', 'tick1On', 'tick2On', 'label1On', 'label2On', 'length', 'direction', 'left', 'bottom', 'right', 'top', 'labelleft', 'labelbottom', 'labelright', 'labeltop', 'labelrotation', 'grid_agg_filter', 'grid_alpha', 'grid_animated', 'grid_antialiased', 'grid_clip_box', 'grid_clip_on', 'grid_clip_path', 'grid_color', 'grid_dash_capstyle', 'grid_dash_joinstyle', 'grid_dashes', 'grid_data', 'grid_drawstyle', 'grid_figure', 'grid_fillstyle', 'grid_gapcolor', 'grid_gid', 'grid_in_layout', 'grid_label', 'grid_linestyle', 'grid_linewidth', 'grid_marker', 'grid_markeredgecolor', 'grid_markeredgewidth', 'grid_markerfacecolor', 'grid_markerfacecoloralt', 'grid_markersize', 'grid_markevery', 'grid_mouseover', 'grid_path_effects', 'grid_picker', 'grid_pickradius', 'grid_rasterized', 'grid_sketch_params', 'grid_snap', 'grid_solid_capstyle', 'grid_solid_joinstyle', 'grid_transform', 'grid_url', 'grid_visible', 'grid_xdata', 'grid_ydata', 'grid_zorder', 'grid_aa', 'grid_c', 'grid_ds', 'grid_ls', 'grid_lw', 'grid_mec', 'grid_mew', 'grid_mfc', 'grid_mfcalt', 'grid_ms']
df.plot(lw=0, marker=".", subplots=True, layout=(-1, 4),
figsize=(15, 30), markersize=1);
# Extract descriptive properties of non-numerical features
df.describe(exclude=["number", "datetime"])
| Date | |
|---|---|
| count | 1132 |
| unique | 1132 |
| top | 2017-02-06 |
| freq | 1 |
# Create figure object with 3 subplots
fig, axes = plt.subplots(ncols=1, nrows=3, figsize=(12, 8))
# Identify non-numerical features
df_non_numerical = df.select_dtypes(exclude=["number", "datetime"])
# Loop through features and put each subplot on a matplotlib axis object
for col, ax in zip(df_non_numerical.columns, axes.ravel()):
# Selects one single feature and counts number of occurrences per unique value
df_non_numerical[col].value_counts().plot(
# Plots this information in a figure with log-scaled y-axis
logy=True, title=col, lw=0, marker=".", ax=ax)
plt.tight_layout();
# Plots the histogram for each numerical feature in a separate subplot
df.hist(bins=25, figsize=(15, 25), layout=(-1, 5), edgecolor="black")
plt.tight_layout();
# Collects for each feature the most frequent entry
most_frequent_entry = df.mode()
# Checks for each entry if it contains the most frequent entry
df_freq = df.eq(most_frequent_entry.values, axis=1)
# Computes the mean of the 'is_most_frequent' occurrence
df_freq = df_freq.mean().sort_values(ascending=False)
# Show the 5 top features with the highest ratio of singular value content
display(df_freq.head())
# Visualize the 'df_freq' table
df_freq.plot.bar(figsize=(15, 4));
% Deli. Qty to Traded Qty 0.000883 Date 0.000000 Open 0.000000 High 0.000000 Low 0.000000 dtype: float64
# Creates mask to identify numerical features with more or less than 25 unique features
cols_continuous = df.select_dtypes(include="number").nunique() >= 25
# Create a new dataframe which only contains the continuous features
df_continuous = df[cols_continuous[cols_continuous].index]
df_continuous.shape
(1132, 12)
import seaborn as sns
sns.pairplot(df_continuous, height=1.5, plot_kws={"s": 2, "alpha": 0.2});
# Create a new dataframe which doesn't contain the numerical continuous features
df_discrete = df[cols_continuous[~cols_continuous].index]
df_discrete.shape
(1132, 0)
# Computes feature correlation
df_corr = df.corr(method="pearson")
# Create labels for the correlation matrix
import numpy as np
labels = np.where(np.abs(df_corr)>0.75, "S",
np.where(np.abs(df_corr)>0.5, "M",
np.where(np.abs(df_corr)>0.25, "W", "")))
# Plot correlation matrix
plt.figure(figsize=(15, 15))
sns.heatmap(df_corr, mask=np.eye(len(df_corr)), square=True,
center=0, annot=labels, fmt='', linewidths=.5,
cmap="vlag", cbar_kws={"shrink": 0.8});
# Creates a mask to remove the diagonal and the upper triangle.
lower_triangle_mask = np.tril(np.ones(df_corr.shape), k=-1).astype("bool")
# Stack all correlations, after applying the mask
df_corr_stacked = df_corr.where(lower_triangle_mask).stack().sort_values()
# Showing the lowest and highest correlations in the correlation matrix
display(df_corr_stacked)
% Deli. Qty to Traded Qty No. of Shares -0.697808
Total Turnover -0.638952
Deliverable Quantity -0.568492
No. of Trades -0.528441
Spread H-L % Deli. Qty to Traded Qty -0.397517
...
WAP High 0.999460
Low 0.999504
Open 0.999519
Close Open 0.999692
WAP Close 0.999716
Length: 66, dtype: float64
#correlation matrix
corrmat = df.corr()
f, ax = plt.subplots(figsize=(30, 30))
sns.heatmap(corrmat, vmax=.8, square=True);
#Bivariate Analysis
import seaborn as sns
sns.pairplot(df)
plt.show()
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(rc={'figure.figsize':(16, 4)})
plt.rcParams['figure.dpi'] = 150
df['High'].plot(linewidth=0.4)
<Axes: >
df['Low'].plot(linewidth=0.4)
<Axes: >
df.plot(linewidth=0.4)
<Axes: >
df['Open'].plot(linewidth=0.4)
<Axes: >
df['Close'].plot(linewidth=0.4)
<Axes: >
cols_to_plot = ['Open', 'High', 'Low']
axes = df[cols_to_plot].plot(marker='.', alpha=0.5, linestyle='None',figsize=(14, 7), subplots=True)
for ax in axes:
ax.set_ylabel('Daily Totals Stock')
fig, axes = plt.subplots(3, 1, figsize=(8, 7), sharex=True)
for name, ax in zip(['High', 'Open', 'Low'], axes):
sns.boxplot(data=df, x='Date', y=name, ax=ax)
ax.set_ylabel('Stock')
ax.set_title(name)
if ax != axes[-1]:
ax.set_xlabel('')
import networkx as nx
df.skew().sort_values(ascending=False)
C:\Users\Prosenjeet Saha\AppData\Local\Temp\ipykernel_12144\4024944668.py:1: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError. Select only valid columns before calling the reduction. df.skew().sort_values(ascending=False)
Spread H-L 2.448261 Total Turnover 2.253303 Low 2.103139 WAP 2.059310 High 2.055368 Close 2.049954 Open 2.035239 No. of Shares 1.880386 Deliverable Quantity 1.744694 No. of Trades 1.614688 Spread C-O -0.764600 % Deli. Qty to Traded Qty -2.123731 dtype: float64
#Log Transform
Low_log=np.log(df['Low'])
Low_log.skew()
0.6786644822512088
#Square Root Transform
Low_sqrt=np.sqrt(df['Low'])
Low_sqrt.skew()
1.2837203476217665
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
from sklearn.preprocessing import PowerTransformer, QuantileTransformer
cols1 = ["Low", "High", "Close"]
def test_transformers(columns):
pt = PowerTransformer()
qt = QuantileTransformer(n_quantiles=500, output_distribution='normal')
fig = plt.figure(figsize=(20,30))
j = 1
for i in columns:
array = np.array(df[i]).reshape(-1, 1)
y = pt.fit_transform(array)
x = qt.fit_transform(array)
plt.subplot(3,3,j)
sns.histplot(array, bins = 50, kde = True)
plt.title(f"Original Distribution for {i}")
plt.subplot(3,3,j+1)
sns.histplot(x, bins = 50, kde = True)
plt.title(f"Quantile Transform for {i}")
plt.subplot(3,3,j+2)
sns.histplot(y, bins = 50, kde = True)
plt.title(f"Power Transform for {i}")
j += 3
test_transformers(cols1)
from matplotlib import pyplot
df.plot()
pyplot.show()
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.formula.api import ols
import seaborn as sns
import numpy as np
import pandas.tseries
plt.style.use('fivethirtyeight')
#Understanding the distribution of Weight
f, ax = plt.subplots( figsize = (11,9) )
plt.title( 'Weight Distributions among Sample' )
plt.ylabel( 'Stock' )
sns.distplot( df.Low )
plt.show()
C:\Users\Prosenjeet Saha\anaconda3\lib\site-packages\seaborn\distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms). warnings.warn(msg, FutureWarning)